import pandas as pd
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
from rdkit import Chem
from rdkit.Chem import Draw
from pandas_profiling import ProfileReport
OpenFoodTox is a structured database summarising the outcomes of hazard identification and characterisation for the human health (all regulated products and contaminants), the animal health (feed additives, pesticides and contaminants) and the environment (feed additives and pesticides).
OpenFoodTox provides information on the substance characterisation, the links to EFSA’s related output, background European legislation, and a summary of the critical toxicological endpoints and reference values.
This is the database showing the chemical of each substances including CAS number and structure
SubCharacter = pd.read_csv('/Users/suongsuong/Documents/GitHub/Data400_Spring24/MiniProject/Data/SubstanceCharacterisation_KJ_2022.csv')
SubCharacter
| Substance | has | Component | CASNumber | ECRefNo | MolecularFormula | smiles | |
|---|---|---|---|---|---|---|---|
| 0 | ((E)-2-(2-hydroxymethylphenyl)-2-methoxyimino-... | as such | ((E)-2-(2-hydroxymethylphenyl)-2-methoxyimino-... | NaN | NaN | C11H14N2O3 | OCc1ccccc1/C(=N\OC)C(=O)NC |
| 1 | ((E)-2-(2-hydroxymethylphenyl)-2-methoxyimino-... | metabolite | Dimoxystrobin | 149961-52-4 | NaN | C19H22N2O3 | CC1=CC(=C(C=C1)C)OCC2=CC=CC=C2C(=NOC)C(=O)NC |
| 2 | (+)-13alpha-Tigloyloxylupanine | as such | (+)-13-alpha-Tigloyloxylupanine | 57943-34-7 | NaN | C20H30N2O3 | CC=C(C)C(=O)OC1CCN2CC3CC(C2C1)CN4C3CCCC4=O |
| 3 | (+)-13alpha-hydroxy-lupanine | as such | (+)-13-Alpha-hydroxy-lupanine | 15358-48-2 | NaN | C15H24N2O2 | C1CC2C3CC(CN2C(=O)C1)C4CC(CCN4C3)O |
| 4 | (+)-Alpha-cedrene | as such | (+)-Alpha-cedrene | 35964-52-4 | NaN | C15H24 | CC1=CCC23C[C@H]1C(C)(C)[C@H]3CC[C@@H]2C |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 7600 | trans-3-Hexenyl formate | as such | trans-3-Hexenyl formate | 56922-80-6 | 260-442-7 | C7H12O2 | CC\C=C\CCOC=O |
| 7601 | trans-3-Hexenyl hexanoate | as such | trans-3-Hexenyl hexanoate | 56922-82-8 | 260-444-8 | C12H22O2 | CCCCCC(=O)OCC\C=C\CC |
| 7602 | trans-3-Methyl-2-(2-pentenyl)-2-cyclopenten-1-one | as such | trans-3-Methyl-2-(2-pentenyl)-2-cyclopenten-1-one | 6261-18-3 | 228-410-7 | C11H16O | CC\C=C\CC1=C(C)CCC1=O |
| 7603 | trans-6-Methyl-3-hepten-2-one | as such | trans-6-Methyl-3-hepten-2-one | 20859-10-3 | NaN | C8H14O | CC(C)C/C=C/C(=O)C |
| 7604 | trans-Menthone | as such | trans-Menthone | 89-80-5 | 201-941-1 | C10H18O | CC(C)[C@H]1CC[C@H](C)CC1=O |
7605 rows × 7 columns
Reference Value : The estimated maximum dose (on a body mass basis) or the concentration of an agent to which an individual may be exposed over a specified period without appreciable risk. R
ref_value = pd.read_csv('/Users/suongsuong/Documents/GitHub/Data400_Spring24/MiniProject/Data/ReferenceValues_KJ_2022.csv')
RefValue = ref_value.drop(columns=['Author','Year','qualfier','Assessment'])
RefValue
| Substance | OutputID | value | unit | Population | |
|---|---|---|---|---|---|
| 0 | (-)-3,7-Dimethyl-6-octen-1-ol | 2180 | 30.0 | µg/kg bw/day | Consumers |
| 1 | (-)-3,7-Dimethyl-6-octen-1-ol | 2864 | 4.4 | mg/kg | Cats as pet |
| 2 | (-)-3,7-Dimethyl-6-octen-1-ol | 2864 | 8.0 | mg/kg | Chicken for egg production |
| 3 | (-)-3,7-Dimethyl-6-octen-1-ol | 2864 | 8.0 | mg/kg | Chicken for meat production |
| 4 | (-)-3,7-Dimethyl-6-octen-1-ol | 2864 | 10.0 | mg/kg | Pigs - less than 1 year old |
| ... | ... | ... | ... | ... | ... |
| 11052 | trans-Menthone | 2822 | 0.3 | mg/kg | Pigs |
| 11053 | trans-Menthone | 2822 | 0.3 | mg/kg | Poultry |
| 11054 | trans-Menthone | 2822 | 0.5 | mg/kg | Animal not used for food production - unspecified |
| 11055 | trans-Menthone | 2822 | 0.5 | mg/kg | Cattle |
| 11056 | trans-Menthone | 2822 | 0.5 | mg/kg | Salmons |
11057 rows × 5 columns
A reference point is like a starting point scientists use when they're figuring out how much of a substance can be harmful.
RefPoint = pd.read_csv('/Users/suongsuong/Documents/GitHub/Data400_Spring24/MiniProject/Data/ReferencePoints_KJ_2022.csv')
#RefPoint = ref_point.drop(columns=['Substance','Author','Year','qualifier'])
RefPoint
| Substance | Author | Year | OutputID | Study | TestType | Species | Route | DurationDays | Endpoint | qualifier | value | unit | Effect | Toxicity | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | (-)-3,7-Dimethyl-6-octen-1-ol | EFSA FEEDAP | 2016 | 2864 | Animal (target species) health | subchronic | Rat | oral: feed | 84.0 | NOEL | = | 50.000 | mg/kg bw/day | no adverse effect observed | NaN |
| 1 | (-)-Hyoscyamine and (-)-Scopolamine group | EFSA CONTAM | 2013 | 2396 | Animal (target species) health | not reported | Pig | Not reported | NaN | NOAEL | = | 1500.000 | µg/kg | not reported | not reported |
| 2 | (-)-Hyoscyamine and (-)-Scopolamine group | EFSA CONTAM | 2013 | 2396 | Human health | study with volunteers | Human | Not reported | NaN | NOAEL | = | 0.160 | µg/kg bw | clinical signs | systemic |
| 3 | (1R,2S,5R)-N-(2-(Pyridine-2-yl)ethyl)-3-p-ment... | EFSA CEF | 2014 | 2524 | Human health | subchronic | Rat | oral: feed | 90.0 | NOAEL | = | 5.000 | mg/kg bw/day | histopathology non neoplastic | endocrine |
| 4 | (1R,2S,5R)-N-(4-Methoxyphenyl)-5-methyl-2-(1-m... | EFSA CEF | 2012 | 2147 | Human health | subchronic | Rat | oral: feed | 90.0 | NOAEL | = | 100.000 | mg/kg bw/day | organ weights | hepatotoxicity |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 13349 | p-Mentha-1,8-dien-7-ol | EFSA CEF | 2012 | 2130 | Human health | subchronic | Rat | oral: gavage | 90.0 | dose level | = | 120.000 | mg/kg bw/day | no adverse effect observed | NaN |
| 13350 | p-Mentha-1,8-dien-7-ol | EFSA CEF | 2013 | 2376 | Human health | subchronic | Dog | Not reported | 90.0 | dose level | = | 120.000 | mg/kg bw | no adverse effect observed | NaN |
| 13351 | p-Mentha-1,8-dien-7-ol | EFSA CEF | 2013 | 2376 | Human health | subchronic | Rat | oral: gavage | 90.0 | dose level | = | 120.000 | mg/kg bw/day | no adverse effect observed | NaN |
| 13352 | sec-Butan-3-onyl acetate | EFSA FEEDAP | 2016 | 2887 | Animal (target species) health | subchronic | Rat | oral: gavage | 90.0 | NOAEL | = | 90.000 | mg/kg bw/day | mortality | systemic |
| 13353 | tr-1-(2,6,6-Trimethyl-1-cyclohexen-1-yl)but-2-... | EFSA FEEDAP | 2016 | 2822 | Ecotox (water compartment) | acute toxicity | Green alga | Not reported | NaN | EC50 | = | 0.828 | mg/L | mortality | NaN |
13354 rows × 15 columns
Genotoxicity is the property of chemical agents that damage the genetic information within a cell causing mutations, which may lead to cancer
GenTox = pd.read_csv('/Users/suongsuong/Documents/GitHub/Data400_Spring24/MiniProject/Data/Genotoxicity_KJ_2022.csv')
#GenTox = Gen_tox.drop(columns=['Substance','Author','Year'])
GenTox
| Substance | Author | Year | OutputID | Genotoxicity | |
|---|---|---|---|---|---|
| 0 | ((E)-2-(2-hydroxymethylphenyl)-2-methoxyimino-... | EFSA | 2005 | 1060 | Negative |
| 1 | (+)-13alpha-Tigloyloxylupanine | EFSA CONTAM | 2019 | 3321 | Negative |
| 2 | (+)-13alpha-hydroxy-lupanine | EFSA CONTAM | 2019 | 3321 | Negative |
| 3 | (+)-Alpha-cedrene | EFSA AFC | 2008 | 2299 | Not determined |
| 4 | (+)-Alpha-cedrene | EFSA CEF | 2010 | 2039 | Not determined |
| ... | ... | ... | ... | ... | ... |
| 10506 | trans-3-Methyl-2-(2-pentenyl)-2-cyclopenten-1-one | EFSA CEF | 2015 | 2654 | Negative |
| 10507 | trans-3-Methyl-2-(2-pentenyl)-2-cyclopenten-1-one | EFSA CEF | 2015 | 2693 | Negative |
| 10508 | trans-6-Methyl-3-hepten-2-one | EFSA CEF | 2012 | 2165 | Ambiguous |
| 10509 | trans-6-Methyl-3-hepten-2-one | EFSA FAF | 2019 | 3259 | Negative |
| 10510 | trans-Menthone | EFSA FEEDAP | 2016 | 2822 | Not determined |
10511 rows × 5 columns
ProfileReport(SubCharacter, title='Subtance Characterization Data Report', explorative=True)
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]
ProfileReport(RefValue, title='Reference Value Data Report', explorative=True)
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]
ProfileReport(RefPoint, title='Reference Point Data Report', explorative=True)
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]
ProfileReport(GenTox, title='Genotoxicity Data Report', explorative=True)
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]
SubCharacter.dropna(inplace=True)
RefValue.dropna(inplace=True)
RefValue.drop_duplicates(inplace=True)
RefPoint.dropna(inplace=True)
GenTox.dropna(inplace=True)
GenTox.shape
(10511, 5)
This part is not completed yet, the interface could be later improved
# Initialize Dash app
app = dash.Dash(__name__)
# Define layout
app.layout = html.Div([
html.H1("Substance Information Dashboard", style={'textAlign': 'center', 'color': '#0074D9', 'marginBottom': '20px'}),
dcc.Input(id='substance-input', type='text', placeholder='Enter substance...', style={'width': '50%', 'margin': 'auto', 'textAlign': 'center'}),
html.Div(id='output-container', style={'margin': '20px'})
], style={'fontFamily': 'Arial, sans-serif', 'maxWidth': '800px', 'margin': 'auto'})
# Define callback to update output
@app.callback(
Output('output-container', 'children'),
[Input('substance-input', 'value')]
)
def update_output(substance_input):
if substance_input is not None:
# Molecule
smiles = SubCharacter[SubCharacter['Substance'] == substance_input]['smiles'].values[0]
mol = Chem.MolFromSmiles(smiles)
img = Draw.MolToImage(mol, size=(300, 300))
Return_Structure = html.Div([
html.H3("Chemical Structure:"),
html.Div([html.Img(src=img, style={'display': 'block', 'margin': 'auto'})])
])
# RefValue
selected_substance_refvalue = RefValue[RefValue['Substance'] == substance_input]
if not selected_substance_refvalue.empty:
refvalue = selected_substance_refvalue['value'].values[0]
refunit = selected_substance_refvalue['unit'].values[0]
Return_RefValue = html.Div([
html.H3("Reference Value", style={'color': '#2ECC40'}),
html.P("A reference value is like a safety limit for how much of a substance someone can be exposed to without significant risk.", style={'marginBottom': '10px'}),
html.P(f"The reference value for {substance_input} is {refvalue} {refunit}", style={'fontWeight': 'bold'})
])
else:
Return_RefValue = html.Div("Reference Value not found", style={'color': '#FF4136'})
# RefPoint
selected_substance_refpoint = RefPoint[RefPoint['Substance'] == substance_input]
if not selected_substance_refpoint.empty:
pointvalue = selected_substance_refpoint['value'].values[0]
pointunit = selected_substance_refpoint['unit'].values[0]
Return_RefPoint = html.Div([
html.H3("Reference Point", style={'color': '#2ECC40'}),
html.P("A reference point is like a starting point scientists use when they're figuring out how much of a substance can be harmful.", style={'marginBottom': '10px'}),
html.P(f"The reference point for {substance_input} is {pointvalue} {pointunit}", style={'fontWeight': 'bold'})
])
else:
Return_RefPoint = html.Div("Reference Point not found", style={'color': '#FF4136'})
# Gentoxicity
selected_substance_gentox = GenTox[GenTox['Substance'] == substance_input]
if not selected_substance_gentox.empty:
gentox = selected_substance_gentox['Genotoxicity'].values[0]
Return_gentox = html.Div([
html.H3("Genotoxicity", style={'color': '#2ECC40'}),
html.P("Genotoxicity is the property of chemical agents that damage the genetic information within a cell causing mutations, which may lead to cancer.", style={'marginBottom': '10px'}),
html.P(f"Genotoxicity for {substance_input} is {gentox}", style={'fontWeight': 'bold'})
])
else:
Return_gentox = html.Div("Genotoxicity not found", style={'color': '#FF4136'})
return [
Return_Structure,
Return_RefValue,
Return_RefPoint,
Return_gentox
]
else:
return html.Div("Substance not found", style={'color': '#FF4136', 'fontWeight': 'bold', 'textAlign': 'center'})
if __name__ == '__main__':
app.run_server(debug=True)